In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
!pip install plotly
import plotly.express as px
import numpy as np
import geopandas as gpd
from shapely import wkt
from shapely.geometry import Point
!pip install -q folium mapclassify
!pip install -q plotly
import folium
from folium.plugins import MarkerCluster
from IPython.display import display
from IPython.display import HTML
Requirement already satisfied: plotly in /opt/conda/lib/python3.10/site-packages (5.19.0)
Requirement already satisfied: tenacity>=6.2.0 in /opt/conda/lib/python3.10/site-packages (from plotly) (8.2.3)
Requirement already satisfied: packaging in /opt/conda/lib/python3.10/site-packages (from plotly) (23.2)
In [2]:
"""Imports the WA Electric Vehicle and 2010 Census Race Data by Legislative District datasets"""
# Read in EV dataset
ev = pd.read_csv('ev.csv')
# Read in census data
leg = pd.read_csv('legislative.csv')
In [3]:
dirty_charging_df = pd.read_csv("EV_Charging_Stations.csv")

clean_charging_df = dirty_charging_df[['Fuel Type Code', 'Station Name', 'Street Address',
       'City', 'State', 'ZIP', 'Station Phone', 'Groups With Access Code',
       'Access Days Time', 'EV Network', 'Geocode Status',
       'Latitude', 'Longitude', 'Date Last Confirmed',
       'ID', 'Updated At', 'EV Connector Types', 'Country',
       'Access Code', 'EV Workplace Charging']]
In [4]:
ev_df = pd.read_csv("Clean_EV_Population.csv")
charging_df = pd.read_csv("Clean_EV_Charging_Stations.csv")
racial_df = pd.read_csv("Clean_Legislative.csv")
state_data = gpd.read_file("tl_2010_53_tract00.shp")
In [5]:
charging_df['geometry'] = [Point(xy) for xy in zip(charging_df.Longitude, charging_df.Latitude)]
In [6]:
racial_df['Legislative District'] = pd.to_numeric(racial_df['Legislative District'], errors='coerce')
In [7]:
"""Cleans the EV dataset"""
# Dropping NA values in the Vehicle Location and Legislative District columns
ev.dropna(subset=['Vehicle Location', 'Legislative District'], inplace=True)
# Legislative District column in EV dataset was a float type. This converts the column to a string type
ev['Legislative District'] = ev['Legislative District'].astype(int)
ev['Legislative District'] = ev['Legislative District'].astype(str)
In [8]:
"""Percent EV ownership per legislative district"""
# Counts the number of electric vehicles in each legislative district, then calculates the sum of all the EVs
# in the data set
evs_per_district = ev.groupby('Legislative District').size()
total_evs = evs_per_district.sum()
# Calulcating the percentage of EVs per district out of all EVs in the state, rounding to 2 decimal points
rates = (evs_per_district / total_evs) * 100
rates = rates.round(2)
# Resets the index to aid in merging the two data frames
rates = rates.reset_index()
rates.columns = ['Legislative District', 'Percentage EV Ownership']

# Having an issue with the merge function creating duplicate 'Percentage EV Ownership' columns when
# the cell was run multiple times. This drops the column in the EV dataset to avoid this issue.
if 'Percentage EV Ownership' in ev:
    ev.drop(columns='Percentage EV Ownership', inplace=True)

ev = pd.merge(ev, rates, on='Legislative District', how='left')

# Testing the Percentage EV Ownership calculations on a small subset of the dataframe
rates_subset = rates[rates['Legislative District'].isin(['5', '6', '7', '8', '9'])]
rates_subset = rates_subset.reset_index(drop=True)
expected_output1 = pd.DataFrame({'Legislative District': ['5', '6', '7', '8', '9'], 'Percentage EV Ownership': [4.24, 0.96, 0.47, 1.04, 0.56]})
assert rates_subset.equals(expected_output1), 'Test failed'
print('Test passed')
ev
Test passed
Out[8]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility Electric Range Base MSRP Legislative District DOL Vehicle ID Vehicle Location Electric Utility 2020 Census Tract Percentage EV Ownership
0 3C3CFFGE4E Yakima Yakima WA 98902.0 2014 FIAT 500 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 87 0 14 1593721 POINT (-120.524012 46.5973939) PACIFICORP 5.307700e+10 0.65
1 5YJXCBE40H Thurston Olympia WA 98513.0 2017 TESLA MODEL X Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 200 0 2 257167501 POINT (-122.817545 46.98876) PUGET SOUND ENERGY INC 5.306701e+10 1.13
2 3MW39FS03P King Renton WA 98058.0 2023 BMW 330E Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 20 0 11 224071816 POINT (-122.1298876 47.4451257) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 5.303303e+10 3.97
3 7PDSGABA8P Snohomish Bothell WA 98012.0 2023 RIVIAN R1S Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 21 260084653 POINT (-122.1873 47.820245) PUGET SOUND ENERGY INC 5.306105e+10 2.50
4 5YJ3E1EB8L King Kent WA 98031.0 2020 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 322 0 33 253771913 POINT (-122.2012521 47.3931814) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 5.303303e+10 2.03
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
166430 3FA6P0SU4D Spokane Spokane WA 99223.0 2013 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range 19 0 6 239527123 POINT (-117.369705 47.62637) BONNEVILLE POWER ADMINISTRATION||AVISTA CORP||... 5.306300e+10 0.96
166431 5YJYGDEE5M King Sammamish WA 98074.0 2021 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 45 148715479 POINT (-122.0313266 47.6285782) PUGET SOUND ENERGY INC||CITY OF TACOMA - (WA) 5.303303e+10 6.05
166432 7SAYGDEE5N Snohomish Mukilteo WA 98275.0 2022 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... 0 0 21 220504406 POINT (-122.299965 47.94171) PUGET SOUND ENERGY INC 5.306104e+10 2.50
166433 1G1RH6E43D Lewis Mossyrock WA 98564.0 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible 38 0 20 156418475 POINT (-122.487535 46.5290135) BONNEVILLE POWER ADMINISTRATION||CITY OF TACOM... 5.304197e+10 0.90
166434 5YJSA1E27H Pierce Gig Harbor WA 98332.0 2017 TESLA MODEL S Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible 210 0 26 169045789 POINT (-122.589645 47.342345) BONNEVILLE POWER ADMINISTRATION||CITY OF TACOM... 5.305307e+10 2.00

166435 rows × 18 columns

In [9]:
"""Cleaning Legislative District dataset"""
# Removing the 0 in front of the single digit legislative districts, then making a copy of the data frame with only certain columns
leg['Legislative District'] = leg['Legislative District'].str.lstrip('0')
subset = leg[['Legislative District', 'Total Population 2010', 'White Alone 2010', 'Black or African American Alone 2010', 
             'American Indian and Alaska Native Alone 2010', 'Asian Alone 2010', 'Native Hawaiian and Other Pacific Islander Alone 2010',
             'Some Other Race Alone 2010', 'Population of Two or More Races 2010']].copy()
subset
Out[9]:
Legislative District Total Population 2010 White Alone 2010 Black or African American Alone 2010 American Indian and Alaska Native Alone 2010 Asian Alone 2010 Native Hawaiian and Other Pacific Islander Alone 2010 Some Other Race Alone 2010 Population of Two or More Races 2010
0 Washington State 6724540 5196362 240042 103869 481067 40475 349799 312926
1 10 137233 119541 2004 1948 4275 513 3870 5082
2 11 137278 71456 15773 1122 30275 1384 9082 8186
3 12 137285 107156 467 4599 990 175 20029 3869
4 13 137280 108807 1344 1434 1640 131 19459 4465
5 14 137218 101108 1410 6702 1792 133 20918 5155
6 15 137224 81973 1047 4541 1011 106 43623 4923
7 16 137198 99538 1966 1241 1484 257 28740 3972
8 17 137230 114836 3088 1160 6967 1195 3936 6048
9 18 137217 122591 1255 947 5725 354 1769 4576
10 19 137232 119320 1196 3164 2303 320 5632 5297
11 20 137204 123839 772 1929 1202 239 4704 4519
12 21 137260 97656 5213 1159 19384 782 6187 6879
13 22 137239 111921 4016 1494 8589 1115 2601 7503
14 23 137217 111882 3350 2521 8193 1106 2193 7972
15 24 137280 120267 1058 6218 1920 221 2481 5115
16 25 137235 106168 6047 2074 8050 1646 4765 8485
17 26 137274 118577 3038 1568 4438 1093 1733 6827
18 27 137224 93814 13627 2726 10136 1254 5407 10260
19 28 137216 98047 11895 1416 9357 1924 3598 10979
20 29 137257 78163 17608 2035 12565 4404 9652 12830
21 30 137261 86079 10944 1494 16655 3064 10428 8597
22 31 137214 117744 2072 3064 4177 715 3812 5630
23 32 137225 96732 7011 1236 19614 581 5090 6961
24 33 137237 76183 15672 1769 18059 3563 13675 8316
25 34 137208 96382 8523 1557 14499 1214 7648 7385
26 35 137219 119040 2032 3147 3220 638 3163 5979
27 36 137247 114573 3725 1007 9529 298 2052 6063
28 37 137192 54545 31408 1174 36167 903 4957 8038
29 38 137242 105267 4140 4619 8138 839 7044 7195
30 39 137201 120439 1414 1982 2704 353 5493 4816
31 40 137259 117134 1303 1533 3807 255 8129 5098
32 41 137250 95315 2559 402 30936 269 2270 5499
33 42 137231 115238 1195 5107 4798 367 5573 4953
34 43 137252 107212 4961 860 15187 290 2058 6684
35 44 137246 112605 2308 1070 11220 456 3301 6286
36 45 137281 108934 1660 527 17449 206 2970 5535
37 46 137253 104001 5446 934 16449 414 2899 7110
38 47 137210 90810 9587 1103 19594 1637 6625 7854
39 48 137226 91704 2784 543 32355 261 4229 5350
40 49 137244 113137 3995 1370 4707 1142 6630 6263
41 1 137236 108239 2426 865 15785 417 3564 5940
42 2 137202 110085 4987 2254 5768 1532 3762 8814
43 3 137226 116887 3756 3294 3436 810 2036 7007
44 4 137281 126031 1273 1485 2366 298 1636 4192
45 5 137210 116500 1567 865 10306 275 2330 5367
46 6 137198 121721 2681 1840 3479 694 1784 4999
47 7 137263 121820 647 6208 1143 199 2746 4500
48 8 137202 113603 2047 1200 4323 223 10628 5178
49 9 137223 111742 1745 1362 4901 210 12888 4375
In [10]:
"""Legislative District racial distributions"""
# Selects the race columns in the subset dataframe and creates a new column for each race that calculates the percent of that race out
# of the total population in each legislative district. Rounds the percent to two decimal points
for column in subset.columns[2:9]:
    subset.loc[:, column + ' Percentage'] = (subset[column] / subset['Total Population 2010']) * 100
    subset = subset.round(2)
    
# Testing the race percentage calculations on a small subset of the dataframe
subset_test = subset[subset['Legislative District'].isin(['10', '11', '12', '13', '14'])]
columns = ['Legislative District', 'White Alone 2010 Percentage', 'Black or African American Alone 2010 Percentage', 
           'American Indian and Alaska Native Alone 2010 Percentage', 'Asian Alone 2010 Percentage', 
           'Native Hawaiian and Other Pacific Islander Alone 2010 Percentage', 'Some Other Race Alone 2010 Percentage', 
           'Population of Two or More Races 2010 Percentage']
subset_test = subset_test.reset_index(drop=True)
expected_output2 = pd.DataFrame({'Legislative District': ['10', '11', '12', '13', '14'], 'White Alone 2010 Percentage': [87.11, 52.05, 78.05, 79.26, 73.68], 
                                 'Black or African American Alone 2010 Percentage': [1.46, 11.49, 0.34, 0.98, 1.03], 
                                 'American Indian and Alaska Native Alone 2010 Percentage': [1.42, 0.82, 3.35, 1.04, 4.88], 
                                 'Asian Alone 2010 Percentage': [3.12, 22.05, 0.72, 1.19, 1.31], 
                                 'Native Hawaiian and Other Pacific Islander Alone 2010 Percentage': [0.37, 1.01, 0.13, 0.10, 0.10], 
                                 'Some Other Race Alone 2010 Percentage': [2.82, 6.62, 14.59, 14.17, 15.24], 
                                 'Population of Two or More Races 2010 Percentage': [3.70, 5.96, 2.82, 3.25, 3.76]})
assert subset_test[columns].equals(expected_output2), 'Test failed'
print('Test passed')
subset
Test passed
Out[10]:
Legislative District Total Population 2010 White Alone 2010 Black or African American Alone 2010 American Indian and Alaska Native Alone 2010 Asian Alone 2010 Native Hawaiian and Other Pacific Islander Alone 2010 Some Other Race Alone 2010 Population of Two or More Races 2010 White Alone 2010 Percentage Black or African American Alone 2010 Percentage American Indian and Alaska Native Alone 2010 Percentage Asian Alone 2010 Percentage Native Hawaiian and Other Pacific Islander Alone 2010 Percentage Some Other Race Alone 2010 Percentage Population of Two or More Races 2010 Percentage
0 Washington State 6724540 5196362 240042 103869 481067 40475 349799 312926 77.27 3.57 1.54 7.15 0.60 5.20 4.65
1 10 137233 119541 2004 1948 4275 513 3870 5082 87.11 1.46 1.42 3.12 0.37 2.82 3.70
2 11 137278 71456 15773 1122 30275 1384 9082 8186 52.05 11.49 0.82 22.05 1.01 6.62 5.96
3 12 137285 107156 467 4599 990 175 20029 3869 78.05 0.34 3.35 0.72 0.13 14.59 2.82
4 13 137280 108807 1344 1434 1640 131 19459 4465 79.26 0.98 1.04 1.19 0.10 14.17 3.25
5 14 137218 101108 1410 6702 1792 133 20918 5155 73.68 1.03 4.88 1.31 0.10 15.24 3.76
6 15 137224 81973 1047 4541 1011 106 43623 4923 59.74 0.76 3.31 0.74 0.08 31.79 3.59
7 16 137198 99538 1966 1241 1484 257 28740 3972 72.55 1.43 0.90 1.08 0.19 20.95 2.90
8 17 137230 114836 3088 1160 6967 1195 3936 6048 83.68 2.25 0.85 5.08 0.87 2.87 4.41
9 18 137217 122591 1255 947 5725 354 1769 4576 89.34 0.91 0.69 4.17 0.26 1.29 3.33
10 19 137232 119320 1196 3164 2303 320 5632 5297 86.95 0.87 2.31 1.68 0.23 4.10 3.86
11 20 137204 123839 772 1929 1202 239 4704 4519 90.26 0.56 1.41 0.88 0.17 3.43 3.29
12 21 137260 97656 5213 1159 19384 782 6187 6879 71.15 3.80 0.84 14.12 0.57 4.51 5.01
13 22 137239 111921 4016 1494 8589 1115 2601 7503 81.55 2.93 1.09 6.26 0.81 1.90 5.47
14 23 137217 111882 3350 2521 8193 1106 2193 7972 81.54 2.44 1.84 5.97 0.81 1.60 5.81
15 24 137280 120267 1058 6218 1920 221 2481 5115 87.61 0.77 4.53 1.40 0.16 1.81 3.73
16 25 137235 106168 6047 2074 8050 1646 4765 8485 77.36 4.41 1.51 5.87 1.20 3.47 6.18
17 26 137274 118577 3038 1568 4438 1093 1733 6827 86.38 2.21 1.14 3.23 0.80 1.26 4.97
18 27 137224 93814 13627 2726 10136 1254 5407 10260 68.37 9.93 1.99 7.39 0.91 3.94 7.48
19 28 137216 98047 11895 1416 9357 1924 3598 10979 71.45 8.67 1.03 6.82 1.40 2.62 8.00
20 29 137257 78163 17608 2035 12565 4404 9652 12830 56.95 12.83 1.48 9.15 3.21 7.03 9.35
21 30 137261 86079 10944 1494 16655 3064 10428 8597 62.71 7.97 1.09 12.13 2.23 7.60 6.26
22 31 137214 117744 2072 3064 4177 715 3812 5630 85.81 1.51 2.23 3.04 0.52 2.78 4.10
23 32 137225 96732 7011 1236 19614 581 5090 6961 70.49 5.11 0.90 14.29 0.42 3.71 5.07
24 33 137237 76183 15672 1769 18059 3563 13675 8316 55.51 11.42 1.29 13.16 2.60 9.96 6.06
25 34 137208 96382 8523 1557 14499 1214 7648 7385 70.25 6.21 1.13 10.57 0.88 5.57 5.38
26 35 137219 119040 2032 3147 3220 638 3163 5979 86.75 1.48 2.29 2.35 0.46 2.31 4.36
27 36 137247 114573 3725 1007 9529 298 2052 6063 83.48 2.71 0.73 6.94 0.22 1.50 4.42
28 37 137192 54545 31408 1174 36167 903 4957 8038 39.76 22.89 0.86 26.36 0.66 3.61 5.86
29 38 137242 105267 4140 4619 8138 839 7044 7195 76.70 3.02 3.37 5.93 0.61 5.13 5.24
30 39 137201 120439 1414 1982 2704 353 5493 4816 87.78 1.03 1.44 1.97 0.26 4.00 3.51
31 40 137259 117134 1303 1533 3807 255 8129 5098 85.34 0.95 1.12 2.77 0.19 5.92 3.71
32 41 137250 95315 2559 402 30936 269 2270 5499 69.45 1.86 0.29 22.54 0.20 1.65 4.01
33 42 137231 115238 1195 5107 4798 367 5573 4953 83.97 0.87 3.72 3.50 0.27 4.06 3.61
34 43 137252 107212 4961 860 15187 290 2058 6684 78.11 3.61 0.63 11.07 0.21 1.50 4.87
35 44 137246 112605 2308 1070 11220 456 3301 6286 82.05 1.68 0.78 8.18 0.33 2.41 4.58
36 45 137281 108934 1660 527 17449 206 2970 5535 79.35 1.21 0.38 12.71 0.15 2.16 4.03
37 46 137253 104001 5446 934 16449 414 2899 7110 75.77 3.97 0.68 11.98 0.30 2.11 5.18
38 47 137210 90810 9587 1103 19594 1637 6625 7854 66.18 6.99 0.80 14.28 1.19 4.83 5.72
39 48 137226 91704 2784 543 32355 261 4229 5350 66.83 2.03 0.40 23.58 0.19 3.08 3.90
40 49 137244 113137 3995 1370 4707 1142 6630 6263 82.43 2.91 1.00 3.43 0.83 4.83 4.56
41 1 137236 108239 2426 865 15785 417 3564 5940 78.87 1.77 0.63 11.50 0.30 2.60 4.33
42 2 137202 110085 4987 2254 5768 1532 3762 8814 80.24 3.63 1.64 4.20 1.12 2.74 6.42
43 3 137226 116887 3756 3294 3436 810 2036 7007 85.18 2.74 2.40 2.50 0.59 1.48 5.11
44 4 137281 126031 1273 1485 2366 298 1636 4192 91.81 0.93 1.08 1.72 0.22 1.19 3.05
45 5 137210 116500 1567 865 10306 275 2330 5367 84.91 1.14 0.63 7.51 0.20 1.70 3.91
46 6 137198 121721 2681 1840 3479 694 1784 4999 88.72 1.95 1.34 2.54 0.51 1.30 3.64
47 7 137263 121820 647 6208 1143 199 2746 4500 88.75 0.47 4.52 0.83 0.14 2.00 3.28
48 8 137202 113603 2047 1200 4323 223 10628 5178 82.80 1.49 0.87 3.15 0.16 7.75 3.77
49 9 137223 111742 1745 1362 4901 210 12888 4375 81.43 1.27 0.99 3.57 0.15 9.39 3.19
In [11]:
"""Merging EV and Legislative District datasets"""
ev_leg = pd.merge(ev, subset, on='Legislative District')
ev_leg
Out[11]:
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility ... Native Hawaiian and Other Pacific Islander Alone 2010 Some Other Race Alone 2010 Population of Two or More Races 2010 White Alone 2010 Percentage Black or African American Alone 2010 Percentage American Indian and Alaska Native Alone 2010 Percentage Asian Alone 2010 Percentage Native Hawaiian and Other Pacific Islander Alone 2010 Percentage Some Other Race Alone 2010 Percentage Population of Two or More Races 2010 Percentage
0 3C3CFFGE4E Yakima Yakima WA 98902.0 2014 FIAT 500 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 133 20918 5155 73.68 1.03 4.88 1.31 0.10 15.24 3.76
1 5YJ3E1EA5L Yakima Yakima WA 98902.0 2020 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 133 20918 5155 73.68 1.03 4.88 1.31 0.10 15.24 3.76
2 JA4T5VA98P Yakima Yakima WA 98908.0 2023 MITSUBISHI OUTLANDER Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible ... 133 20918 5155 73.68 1.03 4.88 1.31 0.10 15.24 3.76
3 5YJ3E1EA7K Yakima Tieton WA 98947.0 2019 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 133 20918 5155 73.68 1.03 4.88 1.31 0.10 15.24 3.76
4 1C4JJXR64N Yakima Naches WA 98937.0 2022 JEEP WRANGLER Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range ... 133 20918 5155 73.68 1.03 4.88 1.31 0.10 15.24 3.76
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
166430 KNDC3DLC2P Whatcom Bellingham WA 98225.0 2023 KIA EV6 Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... ... 367 5573 4953 83.97 0.87 3.72 3.50 0.27 4.06 3.61
166431 1N4AZ1CP6J Whatcom Ferndale WA 98248.0 2018 NISSAN LEAF Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 367 5573 4953 83.97 0.87 3.72 3.50 0.27 4.06 3.61
166432 KM8K33AG8L Whatcom Bellingham WA 98226.0 2020 HYUNDAI KONA Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 367 5573 4953 83.97 0.87 3.72 3.50 0.27 4.06 3.61
166433 7SAXCAE56P Whatcom Blaine WA 98230.0 2023 TESLA MODEL X Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... ... 367 5573 4953 83.97 0.87 3.72 3.50 0.27 4.06 3.61
166434 1C4RJXN60R Whatcom Bellingham WA 98225.0 2024 JEEP WRANGLER Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range ... 367 5573 4953 83.97 0.87 3.72 3.50 0.27 4.06 3.61

166435 rows × 33 columns

In [12]:
def merge_data(csv1_path, csv2_path):
    ev_data = pd.read_csv(csv1_path)
    racial_data = pd.read_csv(csv2_path)

    ev_racial_data = ev_data.merge(racial_data, how="left", left_on="Legislative District", right_on="Legislative District")
    ev_racial_data = ev_racial_data.drop(["Unnamed: 0_x", "Unnamed: 0_y"], axis=1) # noticed the merge automatically includes
    # these columns so we need to remove them

    return ev_racial_data

ev_racial_data = merge_data("Clean_EV_Population.csv", "Clean_Legislative.csv")
display(ev_racial_data)
assert type(ev_racial_data) == pd.DataFrame
assert list(ev_racial_data.columns) == ['VIN (1-10)', 'County', 'City', 'State', 'Postal Code', 'Model Year',
       'Make', 'Model', 'Electric Vehicle Type',
       'Clean Alternative Fuel Vehicle (CAFV) Eligibility', 'Electric Range',
       'Base MSRP', 'Legislative District', 'DOL Vehicle ID',
       'Vehicle Location', 'Electric Utility', '2020 Census Tract', 'geometry',
       'Total Population 2010', 'Population of One Race 2010',
       'White Alone 2010', 'Black or African American Alone 2010',
       'American Indian and Alaska Native Alone 2010', 'Asian Alone 2010',
       'Native Hawaiian and Other Pacific Islander Alone 2010',
       'Some Other Race Alone 2010', 'Population of Two or More Races 2010',
       'Hispanic or Latino (of Any Race) 2010', 'Not Hispanic or Latino 2010',
       'Population of One Race Not Hispanic or Latino 2010',
       'White Alone Not Hispanic or Latino 2010',
       'Black or African American Alone Not Hispanic or Latino 2010',
       'American Indian and Alaska Native Alone Not Hispanic or Latino 2010',
       'Asian Alone Not Hispanic or Latino 2010',
       'Native Hawaiian and Other Pacific Islander Alone Not Hispanic or Latino 2010',
       'Some Other Race Alone Not Hispanic or Latino 2010',
       'Population of Two or More Races Not Hispanic or Latino 2010',
       'Total Population Over 18 2010', 'Population of One Race Over 18 2010',
       'White Alone Over 18 2010',
       'Black or African American Alone Over 18 2010',
       'American Indian and Alaska Native Alone Over 18 2010',
       'Asian Alone Over 18 2010',
       'Native Hawaiian and Other Pacific Islander Alone Over 18 2010',
       'Some Other Race Alone Over 18 2010',
       'Population of Two or More Races Over 18 2010',
       'Hispanic or Latino (of Any Race) Over 18 2010',
       'Not Hispanic or Latino Over 18 2010',
       'Population of One Race Not Hispanic or Latino Over 18 2010',
       'White Alone Not Hispanic or Latino Over 18 2010',
       'Black or African American Alone Not Hispanic or Latino Over 18 2010',
       'American Indian and Alaska Native Alone Not Hispanic or Latino Over 18 2010',
       'Asian Alone Not Hispanic or Latino Over 18 2010',
       'Native Hawaiian and Other Pacific Islander Alone Not Hispanic or Latino Over 18 2010',
       'Some Other Race Alone Not Hispanic or Latino Over 18 2010',
       'Population of Two or More Races Not Hispanic or Latino Over 18 2010'
]
assert len(ev_racial_data) == 166790
VIN (1-10) County City State Postal Code Model Year Make Model Electric Vehicle Type Clean Alternative Fuel Vehicle (CAFV) Eligibility ... Hispanic or Latino (of Any Race) Over 18 2010 Not Hispanic or Latino Over 18 2010 Population of One Race Not Hispanic or Latino Over 18 2010 White Alone Not Hispanic or Latino Over 18 2010 Black or African American Alone Not Hispanic or Latino Over 18 2010 American Indian and Alaska Native Alone Not Hispanic or Latino Over 18 2010 Asian Alone Not Hispanic or Latino Over 18 2010 Native Hawaiian and Other Pacific Islander Alone Not Hispanic or Latino Over 18 2010 Some Other Race Alone Not Hispanic or Latino Over 18 2010 Population of Two or More Races Not Hispanic or Latino Over 18 2010
0 3C3CFFGE4E Yakima Yakima WA 98902.0 2014 FIAT 500 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 21616 79487 77744 71356 907 3979 1312 85 105 1743
1 5YJXCBE40H Thurston Olympia WA 98513.0 2017 TESLA MODEL X Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 6102 93558 90163 79905 3364 1366 4367 1002 159 3395
2 3MW39FS03P King Renton WA 98058.0 2023 BMW 330E Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range ... 11908 92174 88859 53220 10828 621 23089 944 157 3315
3 7PDSGABA8P Snohomish Bothell WA 98012.0 2023 RIVIAN R1S Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... ... 8755 96811 94026 74239 3605 738 14743 548 153 2785
4 5YJ3E1EB8L King Kent WA 98031.0 2020 TESLA MODEL 3 Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 15898 89387 86183 58131 10853 1010 13708 2256 225 3204
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
166785 3FA6P0SU4D Spokane Spokane WA 99223.0 2013 FORD FUSION Plug-in Hybrid Electric Vehicle (PHEV) Not eligible due to low battery range ... 4280 102421 100196 93650 2031 1227 2748 431 109 2225
166786 5YJYGDEE5M King Sammamish WA 98074.0 2021 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... ... 5316 96646 94492 79983 1214 331 12492 144 328 2154
166787 7SAYGDEE5N Snohomish Mukilteo WA 98275.0 2022 TESLA MODEL Y Battery Electric Vehicle (BEV) Eligibility unknown as battery range has not b... ... 8755 96811 94026 74239 3605 738 14743 548 153 2785
166788 1G1RH6E43D Lewis Mossyrock WA 98564.0 2013 CHEVROLET VOLT Plug-in Hybrid Electric Vehicle (PHEV) Clean Alternative Fuel Vehicle Eligible ... 5858 98475 96452 93519 467 1305 918 166 77 2023
166789 5YJSA1E27H Pierce Gig Harbor WA 98332.0 2017 TESLA MODEL S Battery Electric Vehicle (BEV) Clean Alternative Fuel Vehicle Eligible ... 4927 101989 98964 91180 2367 1046 3480 758 133 3025

166790 rows × 56 columns

In [13]:
geometry1 = ev_racial_data["Vehicle Location"].apply(wkt.loads)
ev_racial_gdf = gpd.GeoDataFrame(ev_racial_data, geometry=geometry1, crs="EPSG:4326")

geometry2 = [Point(xy) for xy in zip(charging_df["Longitude"], charging_df["Latitude"])]
charging_gdf = gpd.GeoDataFrame(charging_df, geometry=geometry2, crs="EPSG:4326")

# Set CRS for GeoDataFrames to EPSG:4326
state_data.crs = 'EPSG:4326'

entire_state = state_data[["geometry"]].dissolve()
fig, ax = plt.subplots(figsize=(10, 5))
entire_state.plot(ax=ax, color="#EEE")
state_data.plot(ax=ax, facecolor="#CCCCCC" , edgecolor="black")

# Plot EV ownership
wa_data = gpd.sjoin(ev_racial_gdf, state_data, how='inner', predicate='intersects')
wa_data.plot(ax=ax, color="blue", marker="o", markersize=1, label="EV Ownership")

# Plot EV charging stations
charging_gdf.plot(ax=ax, color="red", marker="o", markersize=1, label="EV Charging Stations")

# Set title and turn off axis
ax.set_title("Washington with EV Charging Stations & EV Ownership")
ax.set_axis_off()

ax.legend()

plt.show()

assert type(ev_racial_gdf) == gpd.GeoDataFrame
assert type(charging_gdf) == gpd.GeoDataFrame
assert ax.get_title() == "Washington with EV Charging Stations & EV Ownership", "title does not match expected"
assert not ax.axison, "borders and labels must be hidden"
assert ax.legend() is not None, "missing legend"
No description has been provided for this image
In [14]:
# Perform spatial join between ev_racial_gdf and state_data
wa_data = gpd.sjoin(ev_racial_gdf, state_data, how='inner', predicate='intersects')

# Plot EV charging stations and EV ownership using Plotly Express
fig = px.scatter_mapbox(charging_gdf,
                        lat="Latitude",
                        lon="Longitude",
                        color_discrete_sequence=["red"],
                        hover_name="Station Name",
                        mapbox_style="carto-positron",
                        zoom=5,
                        opacity=0.7,
                        title="Washington with EV Charging Stations & EV Ownership")

fig.add_scattermapbox(lat=wa_data.geometry.y,
                      lon=wa_data.geometry.x,
                      mode="markers",
                      marker=dict(color="blue", size=4),
                      hoverinfo="text",
                      text="EV Home",
                      name="EV Ownership")

# Using renderer to display interactive map in Jupyter Notebook
fig.show(renderer='iframe')

# Show the interactive map
fig.show()
In [15]:
"""Interactive scatterplot comparing the percentage of white individuals (x) to the percentage of EV ownership in each
legislative district in WA"""
# Uses the plotlyexpress library to create the scatterplot
interactive = px.scatter(ev_leg, x='White Alone 2010 Percentage', y='Percentage EV Ownership', hover_data=['Legislative District'])
interactive.update_layout(title='Percentage of White Individuals vs Percentage of EV Ownership per WA Legislative District', 
                          xaxis_title='Percentage of White Individuals', 
                          yaxis_title='Percentage of EV Ownership')
# Using renderer to display interactive graph in Jupyter Notebook
interactive.show(renderer='iframe')
interactive.show()
In [16]:
"""Barplots showing racial distribution of legislative districts with highest and lowest percent EV ownership"""
# Locates the row with the highest and lowest percent EV ownership
high_ev_index = ev_leg['Percentage EV Ownership'].idxmax()
high_ev_district = ev_leg.loc[high_ev_index]
low_ev_index = ev_leg['Percentage EV Ownership'].idxmin()
low_ev_district = ev_leg.loc[low_ev_index]

# Creating dictionaries with the columns used to make the barplots
high_ev_data = {'Legislative District': high_ev_district['Legislative District'], 'Race': ['White', 'Black', 'Asian', 'Two or More Races', 'Some Other Race'],
                'Percentage': [high_ev_district['White Alone 2010 Percentage'], high_ev_district['Black or African American Alone 2010 Percentage'], 
                               high_ev_district['Asian Alone 2010 Percentage'], high_ev_district['Population of Two or More Races 2010 Percentage'],
                               high_ev_district['Some Other Race Alone 2010 Percentage']]}
low_ev_data = {'Legislative District': low_ev_district['Legislative District'], 'Race': ['White', 'Black', 'Asian', 'Two or More Races', 'Some Other Race'],
               'Percentage': [low_ev_district['White Alone 2010 Percentage'], low_ev_district['Black or African American Alone 2010 Percentage'], 
                              low_ev_district['Asian Alone 2010 Percentage'], low_ev_district['Population of Two or More Races 2010 Percentage'], 
                              low_ev_district['Some Other Race Alone 2010 Percentage']]}

# Converting the dictionaries into dataframes
high_ev_df = pd.DataFrame(high_ev_data)
low_ev_df = pd.DataFrame(low_ev_data)

# Creating the highest EV ownership race distribution barplot
sns.set(style='white')
sns.set_palette('Set2')
high_barplot = sns.catplot(data=high_ev_df, x='Legislative District', y='Percentage', hue='Race', kind='bar')
plt.xlabel('Legislative District 41')
plt.ylabel('Percentage of Race')
plt.title('Race Dist. of Legislative District with the Highest EV Ownership in WA')
# Making sure that the y-axes on both graphs are equal
plt.ylim(0, max(high_ev_df['Percentage']))
plt.show()

# Creating the lowest EV ownership race distribution barplot
sns.set(style='white')
sns.set_palette('Set2')
low_barplot = sns.catplot(data=low_ev_df, x='Legislative District', y='Percentage', hue='Race', kind='bar')
plt.xlabel('Legislative District 15')
plt.ylabel('Percentage of Race')
plt.title('Race Dist. of Legislative District with the Lowest EV Ownership in WA')
# Making sure that the y-axes on both graphs are equal
plt.ylim(0, max(high_ev_df['Percentage']))
plt.show()
No description has been provided for this image
No description has been provided for this image